mug <- read.csv("https://raw.githubusercontent.com/amy17519/FredorangeMuggers/master/FredorangeMug.csv")
user <- read.csv("https://raw.githubusercontent.com/amy17519/FredorangeMuggers/master/FredorangeUser.csv")
library(dplyr)
library(plotly)
library(countrycode)
mug_country<-as.data.frame(mug %>%
group_by(Country) %>%
summarise(Quantity = length(Country)))
user_country<-as.data.frame(user %>%
group_by(Country) %>%
summarise(People = length(Country)))
#take out users who did not indicate country. There are 1646 of them.
user_country<-user_country[-which(user_country$Country=='undefined country'),]
mug_country$CountryCode<-countrycode(mug_country$Country,"country.name", "iso3c")
user_country$CountryCode<-countrycode(user_country$Country,"country.name", "iso3c")
mug_country_noUSA<-mug_country[-which(mug_country$Country=='USA'),]
user_country_noUSA<-user_country[-which(user_country$Country=='usa'),]
#plotly maps
l <- list(color = toRGB("gray85"), width = 0.5)
g <- list(showframe = FALSE,showcoastlines = FALSE,
projection = list(type ='equirectangular'))
plot_ly(mug_country, z = Quantity, text = Country,
locations = CountryCode, type = 'choropleth',
color = Quantity, colors = 'Oranges', marker = list(line = l),
colorbar = list(title = '# of Products')) %>%
layout(title = 'Geographic Distribution of Mug Products', geo = g)
plot_ly(mug_country_noUSA, z = Quantity, text = Country,
locations = CountryCode, type = 'choropleth',
color = Quantity, colors = 'Oranges', marker = list(line = l),
colorbar = list(title = '# of Products')) %>%
layout(title = 'Geographic Distribution of Mug Products, excluding USA', geo = g)
plot_ly(user_country, z = People, text = Country,
locations = CountryCode, type = 'choropleth',
color = People, colors = 'YlGnBu', marker = list(line = l),
colorbar = list(title = '# of Collectors')) %>%
layout(title = 'Geographic Distribution of Mug Collectors', geo = g)
plot_ly(user_country_noUSA, z = People, text = Country,
locations = CountryCode, type = 'choropleth',
color = People, colors = 'GnBu', marker = list(line = l),
colorbar = list(title = '# of Collectors')) %>%
layout(title = 'Geographic Distribution of Mug Collectors, excluding USA', geo = g)
#choosing k for k-means clustering, k=4
wss <- (nrow(mug[,7:9])-1)*sum(apply(mug[,7:9],2,var))
for (i in 2:10) wss[i] <- sum(kmeans(mug[,7:9],
centers=i)$withinss)
plot(1:10, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")

set.seed(123)
mugCluster <- kmeans(mug[, 7:9], 5, nstart = 100)
#observe patterns in each cluster, rename clusters by their characteristics, then #use it as a new variable: Difficulty
mugCluster$centers
## Owner Seeker Trader
## 1 619.46429 137.357143 50.135714
## 2 13.95616 5.629008 1.405016
## 3 130.25000 341.781250 3.906250
## 4 123.02251 39.546624 14.241158
## 5 368.03065 154.107280 27.796935
mugCluster$cluster <- factor(mugCluster$cluster,levels=c(2,1,5,4,3))
levels(mugCluster$cluster) <- c("Inconclusive","Easy to Find Mugs",'Medium Difficulty',
"Hard to Get Mugs","Very Hard to Get Mugs")
mug$Difficulty <- mugCluster$cluster
plot_ly(data = mug, x = Owner, y = Seeker, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader",Trader, "<br>Difficulty: ",Difficulty),
color = mugCluster$cluster,
colors =c('olivedrab','navyblue','indianred2','darkgoldenrod1','magenta4')) %>%
layout(title='K-means Clustering: # of Seekers vs. # of Owners')
#view editions and countries with most hard to find and collectible mugs
#table(mug$Edition, mugCluster$cluster)
popular_editions<-mug[mug$Edition %in% c("08 Icon Edition","13 You Are Here Series",
"Japan Country Series",'Relief Series'),]
popular_editions$Edition<-as.factor(as.character(popular_editions$Edition))
plot_ly(data = popular_editions, x = Popularity , y = Scarcity, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Edition,
colors = c('olivedrab3','navyblue','indianred2','darkgoldenrod1')) %>%
layout(title='Collectible Editions: Scarcity vs. Popularity',
xaxis=list(title='Popularity: Seeker/max(Seeker)'),
yaxis=list(title='Scarcity: |Owner/max(Owner)-1|'))
plot_ly(data = popular_editions, x =Scarcity , y =WillingnessToTrade ,
mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Edition,
colors = c('olivedrab3','navyblue','indianred2','darkgoldenrod1')) %>%
layout(title='Collectible Editions: Willingness to Trade vs. Scarcity')
table(mug$Country, mugCluster$cluster)
##
## Inconclusive Easy to Find Mugs Medium Difficulty
## Argentina 4 0 3
## Australia 37 5 0
## Austria 9 2 1
## Azerbaijan 0 0 0
## Bahamas 2 0 1
## Bahrain 8 0 1
## Belgium 7 3 4
## Bolivia 0 0 0
## Brazil 10 2 1
## Brunei 2 0 1
## Bulgaria 4 0 2
## Cambodia 1 0 0
## Canada 60 6 16
## Chile 3 0 1
## China 617 5 31
## Colombia 0 0 0
## Costa Rica 3 0 1
## Croatia 0 0 0
## Cyprus 5 0 1
## Czech Republic 10 2 1
## Denmark 6 1 2
## Egypt 10 0 2
## El Salvador 1 0 1
## Finland 4 0 2
## France 20 2 4
## Germany 191 22 14
## Greece 18 2 4
## Guatemala 5 0 1
## Hungary 3 1 1
## India 3 1 0
## Indonesia 83 0 3
## Ireland 3 2 0
## Israel 7 0 0
## Italy 6 0 0
## Japan 529 0 2
## Jordan 3 0 1
## Kazakhstan 2 0 0
## Kuwait 6 0 1
## Lebanon 6 0 1
## Malaysia 40 2 2
## Mexico 57 1 11
## Monaco 0 0 1
## Morocco 2 0 1
## Netherlands 14 5 9
## New Zealand 25 1 0
## Norway 3 0 4
## Oman 4 0 1
## Panama 0 0 0
## Peru 8 0 3
## Philippines 63 3 12
## Poland 12 0 9
## Portugal 7 0 2
## Puerto Rico 2 0 1
## Qatar 8 0 1
## Romania 5 0 3
## Russia 11 1 4
## Saudi Arabia 25 0 3
## Singapore 27 3 1
## South Africa 1 0 0
## South Korea 284 0 2
## Spain 23 3 3
## Sweden 10 2 2
## Switzerland 18 2 6
## Taiwan 606 2 3
## Thailand 55 5 0
## Turkey 26 2 3
## United Arab Emirates 9 3 1
## United Kingdom 37 9 4
## USA 1499 40 61
## Vietnam 16 0 4
##
## Hard to Get Mugs Very Hard to Get Mugs
## Argentina 2 0
## Australia 3 0
## Austria 0 0
## Azerbaijan 1 0
## Bahamas 0 0
## Bahrain 0 1
## Belgium 0 1
## Bolivia 1 0
## Brazil 0 0
## Brunei 0 0
## Bulgaria 0 0
## Cambodia 1 0
## Canada 5 1
## Chile 1 0
## China 52 0
## Colombia 1 0
## Costa Rica 0 0
## Croatia 0 1
## Cyprus 0 0
## Czech Republic 3 0
## Denmark 0 0
## Egypt 0 1
## El Salvador 0 0
## Finland 0 0
## France 12 0
## Germany 19 0
## Greece 2 1
## Guatemala 0 0
## Hungary 0 0
## India 0 0
## Indonesia 7 0
## Ireland 0 0
## Israel 0 0
## Italy 0 0
## Japan 14 1
## Jordan 0 0
## Kazakhstan 0 0
## Kuwait 0 0
## Lebanon 0 0
## Malaysia 10 0
## Mexico 0 9
## Monaco 0 0
## Morocco 0 0
## Netherlands 6 1
## New Zealand 4 3
## Norway 1 0
## Oman 0 0
## Panama 1 0
## Peru 0 1
## Philippines 12 0
## Poland 3 0
## Portugal 1 2
## Puerto Rico 0 0
## Qatar 0 1
## Romania 0 0
## Russia 13 1
## Saudi Arabia 0 0
## Singapore 6 0
## South Africa 1 0
## South Korea 16 0
## Spain 5 1
## Sweden 2 0
## Switzerland 1 0
## Taiwan 21 0
## Thailand 8 0
## Turkey 2 0
## United Arab Emirates 3 0
## United Kingdom 13 1
## USA 57 5
## Vietnam 1 0
popular_country<-mug[mug$Country %in%
c("Canada","China","Germany","Mexico","Russia","USA"),]
popular_country$Country<-as.factor(as.character(popular_country$Country))
plot_ly(data = popular_country, x = Popularity , y = Scarcity, mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Country,
colors = c('#F16745','#404040','#7BC8A4','#4CC3D9',"#93648D","#FFC65D")) %>%
layout(title='Collectible Countries: Scarcity vs. Popularity',
xaxis=list(title='Popularity: Seeker/max(Seeker)'),
yaxis=list(title='Scarcity: |Owner/max(Owner)-1|'))
plot_ly(data = popular_country, x =WillingnessToTrade, y =Scarcity ,
mode = "markers",
text= paste(Name, "<br>Edition: ", Edition, "<br>Country: ", Country,
"<br>City: ",City, "<br>Owner: ",Owner,"<br>Seeker: ",
Seeker,"<br>Trader: ",Trader, "<br>Difficulty: ",Difficulty),
color = Country,
colors = c('#F16745','#404040','#7BC8A4','#4CC3D9',"#93648D","#FFC65D")) %>%
layout(title='Collectible Countries: Willingness to Trade vs. Scarcity',
xaxis=list(title='Willingness To Trade'),
yaxis=list(title='Scarcity: |Owner/max(Owner)-1|'))